Code
library(tidyverse)
library(here)
library(sf)
library(tmap)library(tidyverse)
library(here)
library(sf)
library(tmap)sfo_trees_df <- read.csv(here("data", "sfo_trees", "sfo_trees.csv")) %>%
janitor::clean_names()top_5_status <- sfo_trees_df %>%
filter(!is.na(legal_status)) %>%
### drop_na(legal_status) %>% ### same thing!
group_by(legal_status) %>%
summarise(tree_count = n()) %>%
ungroup() %>%
slice_max(tree_count, n = 5) %>%
arrange(-tree_count)top_5_plot <- ggplot(top_5_status, aes(x = legal_status, y = tree_count)) +
geom_col() +
labs(
y = "Tree Count",
x = "Legal Status",
title = "Top 5 legal status of SFO trees"
) +
theme_bw()
top_5_plotResults are in figure 1 Figure 1
permitted_mta_dpw <- sfo_trees_df %>%
filter(legal_status == "Permitted Site" & caretaker %in% c("MTA", "DPW"))oak_pine_df <- sfo_trees_df %>%
filter(str_detect(species, "Oak") | str_detect(species, "Pine")) %>%
select(species, legal_status, plant_date, latitude, longitude) %>%
mutate(
type = ifelse(str_detect(species, "Oak"), "Oak", "Pine")
)oak_pine_plot <- ggplot(oak_pine_df, aes(y = latitude, x = longitude, colour = type)) +
geom_point() +
theme_bw() +
theme(axis.title = element_blank()) +
labs(
color = "Tree Type",
title = " Locations of Oak and Pine Trees in SF"
)
oak_pine_plotLoad a list of CA native species.
ca_native_df <- read_csv(here("data", "sfo_trees", "ca_native_spp.csv"))How can we compare the California native species to those in our SF trees data? Add a column notes whether each tree is a CA native or not, and save as sfo_trees_native (include species info, legal status, plant date, and location). Then, count how many native vs. non-native trees for each legal status category, and save as sfo_native_status. Extension: include how many individual species in each category as well!
# ca_native_df <- ca_native_df %>%
# mutate(species = paste(scientific_name, " :: ", str_to_title(common_name)))
# sfo_trees_native <- sfo_trees_df %>%
# select(species, legal_status, plant_date, address, latitude, longitude) %>%
# full_join(ca_native_df)
sfo_trees_native <- sfo_trees_df %>%
separate(species, into = c("spp_sci", "spp_common"), sep = " :: ") %>%
select(starts_with('spp'), 'plant_date', 'legal_status', 'longitude', 'latitude') %>%
mutate(ca_native = (spp_sci %in% ca_native_df$scientific_name))
sfo_native_status <- sfo_trees_native %>%
group_by(legal_status, ca_native) %>%
summarize(n_trees = n(),
n_species = n_distinct(spp_sci))Considering only Coast Live Oak and Monterey Pine, have tree planting preferences changed over time?
Create a new dataframe that contains only Coast Live Oak and Monterey Pine observations (NOT all oaks and pines!), and include information on year and location. Call this oak_pine_year_df.
Then, determine whether there is a difference in when trees have been planted.
oak_pine_year_df <- sfo_trees_native %>%
filter(spp_sci %in% c("Quercus agrifolia", "Pinus radiata")) %>%
mutate(plant_year = year(plant_date))
t.test(plant_year ~ spp_sci, data = oak_pine_year_df)
Welch Two Sample t-test
data: plant_year by spp_sci
t = 4.2553, df = 346.94, p-value = 2.69e-05
alternative hypothesis: true difference in means between group Pinus radiata and group Quercus agrifolia is not equal to 0
95 percent confidence interval:
1.855054 5.043743
sample estimates:
mean in group Pinus radiata mean in group Quercus agrifolia
2010.552 2007.102
ggplot(oak_pine_year_df) +
geom_histogram(aes(x = plant_year), bins = 10) +
facet_wrap(~ spp_sci, ncol = 1) +
theme_minimal()ggplot(oak_pine_year_df) +
geom_point(aes(x = longitude, y = latitude, color = plant_year, shape = spp_sci))oak_pine_sf <- oak_pine_year_df %>%
drop_na(longitude, latitude) %>%
st_as_sf(coords = c("longitude", "latitude")) # Convert to spatial coordinates
# But we need to set the coordinate reference system (CRS) so it's compatible with the street map of San Francisco we'll use as a "base layer":
st_crs(oak_pine_sf) <- 4326
# Then we can use `geom_sf`!
ggplot(data = oak_pine_sf) +
geom_sf(aes(color = spp_sci)) +
theme_minimal()sfo_map <- read_sf(here("data", "sfo_map", "tl_2017_06075_roads.shp"))
st_transform(sfo_map, 4326)Simple feature collection with 4087 features and 4 fields
Geometry type: LINESTRING
Dimension: XY
Bounding box: xmin: -122.5136 ymin: 37.70813 xmax: -122.3496 ymax: 37.83213
Geodetic CRS: WGS 84
# A tibble: 4,087 × 5
LINEARID FULLNAME RTTYP MTFCC geometry
* <chr> <chr> <chr> <chr> <LINESTRING [°]>
1 110498938773 Hwy 101 S Off Rmp M S1400 (-122.4041 37.74842, -122.404 …
2 110498937425 Hwy 101 N on Rmp M S1400 (-122.4744 37.80691, -122.4746…
3 1103660229533 Ludlow Aly - No Acc M S1780 (-122.4596 37.73853, -122.4596…
4 1106081811301 Mission Bay Blvd N M S1400 (-122.3946 37.77082, -122.3929…
5 1103666896385 25th Ave N M S1400 (-122.4858 37.78953, -122.4855…
6 1103689702566 Willard N M S1400 (-122.457 37.77817, -122.457 3…
7 1103689702762 25th Ave N M S1400 (-122.4858 37.78953, -122.4858…
8 110498933806 Avenue N M S1400 (-122.3643 37.81947, -122.3638…
9 1103689702763 25th Ave N M S1400 (-122.4854 37.78983, -122.4858…
10 1103677491290 Mission Bay Blvd S M S1400 (-122.3865 37.77086, -122.3878…
# ℹ 4,077 more rows
ggplot(data = sfo_map) +
geom_sf()ggplot() +
geom_sf(data = sfo_map,
size = 0.1,
color = "darkgray") +
geom_sf(data = oak_pine_sf,
aes(color = spp_sci),
size = 0.5) +
theme_void() +
labs(title = "Oaks and pines in San Francisco")tmap_mode("view")
tm_shape(oak_pine_sf) +
tm_dots(col = 'spp_sci')